In [3]:
import pandas as pd
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
In [ ]:
#Jiawei Yang: jiy020@ucsd.edu
#Code Sample 2: Secondhand real-estate online trading activity visualization by State in U.S.
In [4]:
import bs4
import requests
In [5]:
#loading processed state-frequency dataset
state_freq = pd.read_csv('state_freq.csv')[['state_name', 'freq']]
state_freq.head(3)
Out[5]:
state_name freq
0 Arizona 7
1 Arkansas 1
2 California 51
In [6]:
#Web scrape U.S. state abbreviation
abbr_holder = {}
res = requests.get('https://www.bu.edu/brand/guidelines/editorial-style/us-state-abbreviations/')
soup = bs4.BeautifulSoup(res.text)
artcl = soup.find('article', {'class': 'content-area post-116 page type-page status-publish hentry'})
artcl.find_all('tr')[1].find_all('td')[0].text

for entry in artcl.find_all('tr'):
    elements = entry.find_all('td')
    #abbreviations are of len 2
    if len(elements[2].text) == 2:
        abbr_holder[elements[0].text.lower()]=elements[2].text
In [7]:
#match abbr by full name
state_freq['state_abbr'] = state_freq['state_name'].apply(lambda s: abbr_holder[s.lower()])
In [8]:
freq_dt = dict(type = 'choropleth',
              locations = state_freq['state_abbr'],
              locationmode = 'USA-states',
              colorscale = 'mint',
              text = state_freq['state_name'],
              z = state_freq['freq'],
              colorbar = {'title':'bargain counts'})
In [9]:
layout = dict(geo = {'scope':'usa'})
In [10]:
choromap = go.Figure(data = [freq_dt], layout = layout)
In [11]:
choromap.show()
In [12]:
state_freq['freq'].values
Out[12]:
array([ 7,  1, 51,  5,  6,  5,  2, 12, 11, 16,  1,  2,  1,  1,  1,  3,  3,
        1,  5,  5,  1,  1,  1, 21,  1, 12, 23,  1])
In [ ]: